path = "./../../data/PPD_hairCortisol_PlosOne_.sav"
dataset= read_sav(path) #meta data dropped?
write.csv(dataset, file = "./../../data/data_span.csv")import googletrans as gt
import pandas as pd
data = pd.read_csv('./../../data/data_span.csv',encoding = "ISO-8859-1")
trans = gt.Translator()
with open("./../../data/data_programatic_translation.csv", 'w') as f:
f.write('original, translated\n')
for old_col in data.columns.values:
new_col = trans.translate(old_col, src = 'spanish', dest='en').text
f.write(old_col + ',' + new_col + '\n')
data.rename(columns = {old_col:new_col}, inplace = True)
data.to_csv( "./../../data/data_eng.csv" )Paper Variables
df = read.csv("./../../data/data_eng.csv", header = TRUE)
#df %>% variable.names()
# THIS IS WHERE WE CAN DROP UNUNSED VARIABLES, MUTATE NAMES AND ADD META DATA DESCRIBING VARIABLES (variable attribute data)
# There are 29 rows of all NA variables that were brought in from SPSS empty rows
before <- df %>% nrow()
df <- df %>% na.omit()
after <- df %>% nrow()
print(paste('rows dropped:', after-before))## [1] "rows dropped: -29"
df <- df %>% mutate('age' = Age)
attr(df$age, "Description") <- "The age of the mother"
df <- df %>% mutate("nationality" = NationalityDicotomica)
attr(df$NationalityDicotomica, 'Description') <- "The patient's nationality or country of origin at the time of pregnancy"
#df <- df %>% mutate('Marital_Status' = ) # CANT FIND THIS ONE
df <- df %>% mutate('employed' = EmploymentSituationDico)
attr(df$employed, 'Description') <- "Employment situation; is or is not employed at time of pregnancy"
df <- df %>% mutate('occupation' = Profession)
attr(df$occupation, "Description") <- 'If employed, what was the mothers occupation at the time of pregnancy'
df <- df %>% mutate("education_level" = Level_Studies)
attr(df$education_level, 'Description') <- "The highest level of education that the mother had received at the time of pregnancy/study"
df <- df %>% mutate('sport' = Sport)
attr(df$sport, 'Description') <- 'tracking the physical activity of the mother; whether the mother played a sport (during pregnancy) [y/n]' # ???
df <- df %>% mutate('pet' = PetDicotomica)
attr(df$pet, 'Description') <- 'designation for mothers with pets at time of pregnancy'
df <- df %>% mutate('dyed_hair' = Appearance)
attr(df$dyed_hair, "Description") <- 'designation for mothers with dyed hair during pregnancy' # I THINK this is right, need to check
df <- df %>% mutate('first_pregnancy' = FirstPregnancy)
attr(df$first_pregnancy, 'Description') <- 'primiparous; designation for mothers who are having a child for the first time'
df <- df %>% mutate('wanted_pregnancy' = WantedPregnancyDico)
attr(df$wanted_pregnancy, 'Description') <- 'Designation for mothers who desired the pregnancy (planned?)'
df <- df %>% mutate('pregnancy_method' = PregnancyMethodDico)
attr(df$pregnancy_method, 'Description') <- 'designation for *spontaneous* method of fertilization and *Fertility Treatment* or artificial insemination' # PC term? also, lol on "spontaneous"
df <- df %>% mutate('previous_miscarriage' = PreviousMiscarriagesDico)
attr(df$previous_miscarriage, 'Description') <- 'Designation for mothers who have had a miscarriage prior to the current pregnancy'
# df <- df %>% mutate('delivery' = ) # CANT FIND DELIVERY AND LABOR
#df <- mutate('delivery_antisthesia' = ) # Not sure what this one is
df <- df %>% mutate('fetus_sex' = SexFetalDico)
attr(df$fetus_sex, "Description") <- 'the sex of the fetus' # numeric mapping ???
# -------- DEPRESSION METRICS
df <- df %>% mutate('postpartum_depression' = depreposparto)
attr(df$postpartum_depression, 'Description') <- "Diagnosis of post-partum depression in the mother" ### ???? more?
df <- df %>% mutate('depression_tri1' = DEPRESSION1)
df <- df %>% mutate('depression_tri2' = DEPRESSION2)
df <- df %>% mutate('depression_tri3' = DEPRESSION3)
attr(df$depression_tri1, 'Description') <- 'antenatal depression during the 1st trimester.'
attr(df$depression_tri2, 'Description') <- 'antenatal depression during the 2nd trimester.'
attr(df$depression_tri3, 'Description') <- 'antenatal depression during the 3rd trimester.'
df <- df %>% mutate('epds' = EPDS)
attr(df$epds, 'Description') <- 'continuous metric of postnatal depression' #measurement name, type?
# --------- CORTISOL metrics
df <- df %>% mutate('cortisol_tri1' = Cortisol1) #WHAT IS LNCORTISOL1 ????? - Two metrics for coritsol??
df <- df %>% mutate('cortisol_tri2' = Cortisol2)
df <- df %>% mutate('cortisol_tri3' = Cortisol3)
attr(df$cortisol_tri1, 'Description') <- 'Cortisol levels in mothers during pregnancy during the 1st trimester' # units??? look into lncortisol variable, which one should we be using?
attr(df$cortisol_tri2, 'Description') <- 'Cortisol levels in mothers during pregnancy during the 2nd trimester'
attr(df$cortisol_tri3, 'Description') <- 'Cortisol levels in mothers during pregnancy during the 3rd trimester'
# ---------- SELECT
df_tidy <- df %>% select(age, education_level, nationality, postpartum_depression, employed, occupation, sport, sport, pet, dyed_hair, first_pregnancy, wanted_pregnancy, pregnancy_method, previous_miscarriage, fetus_sex, epds, depression_tri1, depression_tri2, depression_tri3, cortisol_tri1, cortisol_tri2, cortisol_tri3 )
#glimpse(df_tidy)
write.csv(df_tidy, file='./../../data/tidy_data.csv') # this might drop Description attribute
save(df_tidy, file='./../../data/tidy_data.Rdata') # this way we can always load it back as is. numeric_mapping <- 'Primer embarazo / First Pregnancy:
0 = No
1 = Yes
Tipo Embarazo / Pregnancy Method:
1 = spontaneously and normally
2 = in vitro fertilization
3 = artificial insemination
Nivel Estudios / level of education
1 = primary
2 = secondary
3 = university students
4 = without studies
Laboral / employee situation
1 = unemployed
2 = full-time job
3 = half-day job
4 = student
5 = work and study
Aspecto Pelo/ Hair Appearance
0 = dyed
1 = natural
Deporte / Sport
0 = No
1 = Yes
Profesión / Profession
1 = unemployment
2 = Head of nurse
3 = Doctor
4 = Nurse
5 = housewife
6 = teacher
7 = management
8 = dental clinic
9 = banking
10 = untrained
11 = psychologist
12 = engineer
Origen / Nationality
1 = Spain
2 = Argentina
3 = Chile
4 = Morocco
5 = Romania
6 = Germany
7 = Russia
8 = Ecuador
9 = Peru
10 = Paraguay
depreposparto / depress post partum
1 = sindepre?
2 = condepre?'
attr(df_tidy, 'numeric mapping information') <- numeric_mappingknitr::opts_chunk$set(warning = TRUE, message = TRUE, echo = FALSE) #this has to go before the codebook() call, make sure echo=FALSE is included.
my_codebook <- codebook(df_tidy) #my_codebook # (TAKES FOREVER TO KNIT) This will produce a codebook, but we need to have this knitted in it's own html file and not echo the code chunks... ## Warning in codebook(df_tidy): The variables session, created, ended have
## to be defined for automatic survey repetition detection to work. Set to no
## repetition by default.
## No missings.
knitr::opts_chunk$set(warning = TRUE, message = TRUE, echo = TRUE)
fh<-file("tmp.Rmd")
writeLines(my_codebook, fh)
close(fh)
render(input='tmp.Rmd', output_file = 'group7_codebook.html', output_dir = getwd() )##
##
## processing file: tmp.Rmd
##
|
| | 0%
|
|.................................................................| 100%
## ordinary text without R code
## output file: tmp.knit.md
## "C:/Program Files/RStudio/bin/pandoc/pandoc" +RTS -K512m -RTS tmp.utf8.md --to html4 --from markdown+autolink_bare_uris+ascii_identifiers+tex_math_single_backslash --output pandoc102479f27cc5.html --smart --email-obfuscation none --self-contained --standalone --section-divs --template "C:\R-3.5.1\library\rmarkdown\rmd\h\default.html" --no-highlight --variable highlightjs=1 --variable "theme:bootstrap" --include-in-header "C:\Users\natha\AppData\Local\Temp\RtmpqE23hE\rmarkdown-str102417db1a45.html" --mathjax --variable "mathjax-url:https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"
##
## Output created: group7_codebook.html
[1] codebook package:
Preprint Arslan, R. C. (2018). How to automatically generate rich codebooks from study metadata. doi:10.31234/osf.io/5qc6h
Zenodo Arslan, R. C. (2018). Automatic codebooks from survey metadata (2018). URL https://github.com/rubenarslan/codebook. DOI